******************************************************************************** 
* 					Create main_distance.dta
******************************************************************************** 

// Load distance data from R output
cap mkdir ../temp/distances
import delimited ../raw/distances/processed/shooting_exposed_cb_0pt05.csv, stringcols(3 6) clear
save ../temp/distances/cb_5.dta,replace
import delimited ../raw/distances/processed/shooting_exposed_cb_1pt05.csv, stringcols(3 6) clear
save ../temp/distances/cb_105.dta,replace
import delimited ../raw/distances/processed/shooting_exposed_cb_1.csv, stringcols(3 6) clear
save ../temp/distances/cb_100.dta,replace
import delimited ../raw/distances/processed/shooting_exposed_cb_2.csv, stringcols(3 6) clear
save ../temp/distances/cb_200.dta,replace
forval d=15(10)95 {
	import delimited ../raw/distances/processed/shooting_exposed_cb_0pt`d'.csv, stringcols(3 6) clear
	save ../temp/distances/cb_`d'.dta,replace
	import delimited ../raw/distances/processed/shooting_exposed_cb_1pt`d'.csv, stringcols(3 6) clear
	save ../temp/distances/cb_1`d'.dta,replace
}
forval d=1/9 {
	import delimited ../raw/distances/processed/shooting_exposed_cb_0pt`d'.csv, stringcols(3 6) clear
	save ../temp/distances/cb_`d'0.dta,replace
	import delimited ../raw/distances/processed/shooting_exposed_cb_1pt`d'.csv, stringcols(3 6) clear
	save ../temp/distances/cb_1`d'0.dta,replace
}
use ../temp/distances/cb_5.dta, clear
forval d=10(5)200 {
	append using ../temp/distances/cb_`d'.dta
}
duplicates report sht_id cb
assert r(unique_value)  == r(N) 
sum pct_covered
gen cb_shoot = (sht_cb_geoid==cb)
ren cb_geoid cb
tab distance,m
tab cb_shoot,m
keep sht_id cb cb_shoot distance pct_covered

// Add shooting variable for CB = "060378003301000"
expand 2 in 1
// note: there is a shooting in this CB before 2008 election, but the CB is 
//		so large that 75% of its area is not within 2 miles of its center
replace cb="060378003301000" if _n==_N
replace sht_id=8553 if _n==_N
replace pct_covered = . if _n==_N
replace distance = 0.05 if _n==_N
replace cb_shoot = 1 if _n==_N
count if cb_shoot==1
assert r(N)==694
keep sht_id cb cb_shoot distance pct_covered
save ../data_intermediate/distances_2mi_all.dta,replace

// collapse to cb per election level, merge back shoot vars later
use  ../data_intermediate/merged_shootings_all.dta,clear
sort sht_date elec_yr sht_id
tab ttl_mentions  any_mentions,m
ren any_mentions media 
gen nomedia = media==0
tab elec_yr,m
ren cb cb_of_shooting
keep sht_id sht_date elec_yr media nomedia cb_of_shooting
save ../temp/shootings_for_distance.dta,replace

use ../data_intermediate/distances_2mi_all.dta,clear
merge m:1 sht_id using ../temp/shootings_for_distance.dta,assert(3) nogen
keep if elec_yr >=2004 & elec_yr <=2010  // keep shootings between 2002 and 2010 elections
order cb elec_yr sht_id cb_of_shooting

// SAVE FOR ALL SHOOTINGS
preserve
bys cb elec_yr (distance): egen max_cb_shoot = max(cb_shoot)
// keep if shooting occurs in CB; if not in CB, then keep closest shooting
gen no_shoot = (cb_shoot==0)
bys cb elec_yr (no_shoot distance): keep if _n==1
assert max_cb_shoot == cb_shoot 
keep cb cb_of_shooting cb_shoot distance elec_yr sht_date sht_id
save ../data_intermediate/all_shootings.dta,replace
restore

// SAVE FOR MEDIA SHOOTINGS
preserve
keep if media==1
// keep if shooting occurs in CB; if not in CB, then keep closest shooting
gen no_shoot = (cb_shoot==0)
bys cb elec_yr (no_shoot distance): keep if _n==1
keep cb cb_of_shooting cb_shoot distance elec_yr sht_date sht_id
save ../data_intermediate/media_shootings.dta,replace
restore 

// SAVE FOR NO MEDIA SHOOTINGS
preserve
keep if media==0
// keep if shooting occurs in CB; if not in CB, then keep closest shooting
gen no_shoot = (cb_shoot==0)
bys cb elec_yr (no_shoot distance): keep if _n==1
keep cb cb_of_shooting cb_shoot distance elec_yr sht_date sht_id
save ../data_intermediate/nomedia_shootings.dta,replace
restore 

// clean shootings data
foreach samp in all media nomedia {
	
	use ../data_intermediate/main_census.dta, clear
	gen ct = substr(cb, 1,11)
	gen cbg = substr(cb, 1,12)
	expand 5
	bys cb: gen elec_yr = 2000+_n*2
	keep cb cbg ct CBG CB CT elec_yr pop_dec sample_main all
	merge 1:1 cb elec_yr using ../data_intermediate/`samp'_shootings.dta, assert(1 3) nogen
	assert cb_shoot==1 if cb == cb_of_shooting
	
	replace cb_shoot=0 if cb_shoot==.
	bys cb (elec_yr): gen sht_cb = sum(cb_shoot)
	replace sht_cb = sht_cb>=1
	label var sht_cb "In CB"
	bys cb (elec_yr): egen any_cb = max(cb_shoot)
	tab sht_cb elec_yr if any_cb==1,m
	
	// make indicator for in same CBG and shooting on
	bys cbg (elec_yr): egen any_cbg = max(sht_cb)
	gen cb_shoot_yr0 = elec_yr if cb_shoot==1
	bys cbg (elec_yr): egen cbg_shoot_y0 = min(cb_shoot_yr0)
	assert  cbg_shoot_y0 !=. if any_cbg==1 // check
	bys cbg (elec_yr): gen cbg_shoot=(any_cbg==1 & elec_yr>=cbg_shoot_y0)
	tab cbg_shoot sht_cb,m
	replace cbg_shoot = 0 if sht_cb==1 // set cbg_shoot = 0 if shooting in CB

	gen dist = distance 
	replace dist = 3.0 if dist==. // fill missing with higher than 2.0 mile value
	
	// make cumulative distance exposure over study period
	gen yr_0204 = elec_yr==2002 | elec_yr==2004
	gen yr_0206 = elec_yr==2002 | elec_yr==2004 | elec_yr==2006
	gen yr_0208 = elec_yr!=2010
	gen yr_0210 = 1
	
	foreach v in 0204 0206 0208 0210 {
		bys cb yr_`v': egen min_dist_`v' = min(dist)
	}
	gen cum_min_dist = dist if elec_yr==2002
	replace cum_min_dist = min_dist_0204 if yr_0204==1 & elec_yr==2004
	replace cum_min_dist = min_dist_0206 if yr_0206==1 & elec_yr==2006
	replace cum_min_dist = min_dist_0208 if yr_0208==1 & elec_yr==2008
	replace cum_min_dist = min_dist_0210 if yr_0210==1 & elec_yr==2010
	
	replace cum_min_dist = round(cum_min_dist,0.01)
	sort cb elec_yr
	order cb elec_yr dist cum_min_dist
	
	keep if sample_main==1
	keep cb cbg ct elec_yr all distance dist cum_min_dist sht_cb cb_shoot cbg_shoot cbg_shoot_y0
	save ../temp/distance_temp_`samp'.dta,replace
}

foreach v in "& all==1" "& cbg_shoot==0" {
	use ../temp/distance_temp_all.dta,clear
	local sub = substr("`v'",3,3)
	forval i=10(10)200 {
		local j = round(`i'/100,0.1)
		local jnm =  string(`j')
		local bot = string(round(`j'- 0.1,0.1))
		local top = string(round(`j',0.1))
		di " cum_min_dist>`bot' & cum_min_dist<=`top' & sht_cb==0 `v'"
		gen sht_pt`i'= cum_min_dist>`bot' & cum_min_dist<=`top' & sht_cb==0 `v'
		local bot = string(round(`j'- 0.1,0.1))
		local top = string(round(`j',0.1))
		cap label var sht_pt`i' "(`bot', `top']"
		count if sht_pt`i'==1
		assert r(N)!=0
	}
	label var sht_pt10 "[0, 0.1]"
	egen any_sht = rowtotal(sht_pt* sht_cb)
	assert any_sht<=1
	gen sht_pt_ov200= 1 - any_sht
	label var sht_pt_ov200 "(2,infinity)"
	ren sht_pt80 sht_ref_pt80 
	keep cb elec_yr sht_*  cum_min_dist 
	save ../temp/data_`sub'.dta,replace
}
// repeat for media and no-media shootings with CBG FE model
foreach samp in media nomedia {
	use ../temp/distance_temp_`samp'.dta,clear
	local sub = substr("`v'",3,3)
	forval i=10(10)200 {
		local j = round(`i'/100,0.1)
		local jnm =  string(`j')
		local bot = string(round(`j'- 0.1,0.1))
		local top = string(round(`j',0.1))
		di " cum_min_dist>`bot' & cum_min_dist<=`top' & sht_cb==0 "
		gen sht_pt`i'= cum_min_dist>`bot' & cum_min_dist<=`top' & sht_cb==0 
		local bot = string(round(`j'- 0.1,0.1))
		local top = string(round(`j',0.1))
		cap label var sht_pt`i' "(`bot', `top']"
		count if sht_pt`i'==1
		assert r(N)!=0
	}
	label var sht_pt10 "[0, 0.1]"
	egen any_sht = rowtotal(sht_pt* sht_cb)
	assert any_sht<=1
	gen sht_pt_ov200= 1 - any_sht
	label var sht_pt_ov200 "(2,infinity)"
	ren sht_pt80 sht_ref_pt80 
	keep cb elec_yr sht_*  cum_min_dist 
	save ../temp/data_`samp'.dta,replace
}

// define programs to check and rename sht_pt variables below
	cap prog drop check_sht_pt_vars
	prog def check_sht_pt_vars
		assert sht_cb + sht_pt10 + sht_pt20 + sht_pt30 + ///
		sht_pt40 + sht_pt50 + sht_pt60 + sht_pt70 + sht_ref_pt80 + ///
		sht_pt90 + sht_pt100 + sht_pt110 + sht_pt120 + sht_pt130 + ///
		sht_pt140 + sht_pt150 + sht_pt160 + sht_pt170 + sht_pt180 + ///
		sht_pt190 + sht_pt200 + sht_pt_ov200==1
	end

	cap prog drop ren_sht_pt_vars
	prog def ren_sht_pt_vars
		syntax, name(str)
		foreach v in sht_cb sht_pt10 sht_pt20 sht_pt30 ///
		sht_pt40 sht_pt50 sht_pt60 sht_pt70 sht_ref_pt80 ///
		sht_pt90 sht_pt100 sht_pt110 sht_pt120 sht_pt130 ///
		sht_pt140 sht_pt150 sht_pt160 sht_pt170 sht_pt180 ///
		sht_pt190 sht_pt200 sht_pt_ov200 {
			ren `v' `name'`v'
		}
	end

// Merge all files together
use ../data_intermediate/reg_2002_2016.dta,clear
merge m:1 cb using ../data_intermediate/main_census.dta, assert(3) nogen
keep reg cb CBG CB CT elec_yr pop_dec sample_main
merge 1:1 cb elec_yr using ../temp/data_cbg.dta, assert(1 3) keep(3) nogen
	check_sht_pt_vars
	ren_sht_pt_vars, name(CBG)
merge 1:1 cb elec_yr using ../temp/data_media.dta, assert(3) nogen
	check_sht_pt_vars
	ren_sht_pt_vars, name(MEDIA)
merge 1:1 cb elec_yr using ../temp/data_nomedia.dta, assert(3) nogen
	check_sht_pt_vars
	ren_sht_pt_vars, name(NOMEDIA)
merge 1:1 cb elec_yr using ../temp/data_all.dta, assert(3) nogen
save ../data/main_distance.dta,replace 

